/*******************************************************************************

PooledRegions analysis_villagelevelaccess_robust.do

2018.10.31 (DJ) Created
2019.01.24 (DJ) Added the robustness using DH measure

This do file runs <PooledRegions analysis_villagelevelaccess.do> but after dealing with
the agrovets who did not participate in our surveys. As a robustness check on 
the impact of missing agrovets on trvael cost adjusted prices, 
we assign them low vs. high fertilizer prices and choose the one that weakens our results. 
*******************************************************************************/

local k 0
foreach indep in std_google_vil_city_km DH_access_std{
	local ++k
	
		
	/*******************************************************************************
	********************************************************************************
		Kilimanjaro - there are 26 missing agrovets
	********************************************************************************
	*******************************************************************************/
	
	
	/*******************************************************************************
		run Agrovet Data Sets
	*******************************************************************************/
	
	use "${analysis}/Kilimanjaro agrovet_shop_level_censusappended.dta", clear	
	gen AV_missing = (mi(urea_cost) & incensus!=.)
	tab AV_missing
	gsort -AV_missing
	gen AV_missing_ID = _n if AV_missing==1
	sum AV_missing_ID
	
	local priceh = 65000 // p90
	local pricel = 46000 // p10
	/*
	. sum av_urea_rprice_17, d
	
						  av_urea_rprice_17
	-------------------------------------------------------------
		  Percentiles      Smallest
	 1%        42000          40000
	 5%        45000          40000
	10%        46000          42000       Obs                 303
	25%        49000          42000       Sum of Wgt.         303
	
	50%        53000                      Mean           54482.95
							Largest       Std. Dev.      7660.485
	75%        60000          75000
	90%        65000          75000       Variance       5.87e+07
	95%        70000          75000       Skewness       .8577677
	99%        75000          75000       Kurtosis       3.409177
	*/
	
	
	tempfile avrobust
	tempname postname
	
	postfile `postname' AV_missing_ID price coefficient se pvalue numobs using `avrobust'	
	
	pause off
	
	
	
	forvalues i=1/26{
		
		di "------------------------ ------------------------   Agrovet ID `i'  ------------------------------ ----------------- "
		
		foreach price in 46000 65000{
		
			use "${analysis}/Kilimanjaro agrovet_shop_level_censusappended.dta", clear	
			gen AV_missing = (mi(urea_cost) & incensus!=.)
			gsort -AV_missing av_shopid
			gen AV_missing_ID = _n if AV_missing==1
			
			replace av_urea_rprice_17 = `price' if AV_missing_ID == `i'
			
			
			/*******************************************************************************
				run ViltoAV Kilimanjaro
			*******************************************************************************/
			** Step (1): make a pair for each village-agrovet shop
				
				di "step 1 ------------------------------------------------------------------------------------------"
				keep av_ownerid av_shopid av_district av_ward av_village ///
				sellfert sellurea sellDAP sellseeds av_*price* av_*qty* av_*rev* dist_border_km av_urea_17_median_imput
				gen to_merge = 1
				tempfile avshoplist
				save `avshoplist'
				
				use "${analysis}/Kilimanjaro road_quality_census.dta", replace
				keep village_name ward district
				merge 1:1 village_name ward district using "${analysis}/Kilimanjaro Vil_distance_to_borders.dta"
				drop _merge
				gen to_merge = 1
				tempfile villist
				save `villist'
				
				joinby to_merge using `avshoplist'
				drop to_merge
			
				gen has_agrovet_village = 0
				replace has_agrovet_village = 1 if (village_name == av_village) & (ward == av_ward) & (district == av_district)
				tempfile vilavpair
				save `vilavpair'
				
				// calculate distances from each village to each agrovet
				
					* google distances
					use "${analysis}/Kilimanjaro ViltoVil.dta", clear
					keep district_origin ward_origin village_name_origin district_dest ward_dest village_name_dest distance_km duration_hrs
					rename (district_origin ward_origin village_name_origin district_dest ward_dest village_name_dest) ///
						   (district ward village_name av_district av_ward av_village)
					tempfile avgoogle
					save `avgoogle'
				
					use `vilavpair', clear
					merge m:1 district ward village_name av_district av_ward av_village using `avgoogle'
					/*
					Result                           # of obs.
					-----------------------------------------
					not matched                       206,733
						from master                    23,470  (_merge==1) <-- see what we can do here later.
						from using                    183,263  (_merge==2)
				
					matched                           233,600  (_merge==3)
					-----------------------------------------
					*/
					drop if _merge==2
					drop _merge
					rename (distance_km duration_hrs) (google_vil_AVvil_km google_vil_AVvil_hrs)
					tempfile vilpair_googledist
					save `vilpair_googledist'
				
				
				** Bring in travel costs directly measured from Transport Surveys
					use "${analysis}/PooledRegions ViltoVil_KMCOST.dta", replace
					keep if survey_region_O == "KILIMANJARO" & survey_region_D == "KILIMANJARO"
					rename (survey_district_O survey_ward_O survey_village_O survey_district_D survey_ward_D survey_village_D) ///
						   (district ward village_name av_district av_ward av_village)
					keep district ward village_name av_district av_ward av_village DIST_* arcdist_vil_vil_km
					tempfile measuredcost
					save `measuredcost'
					
					use `vilpair_googledist'
					merge m:1 district ward village_name av_district av_ward av_village using `measuredcost'
					drop if _merge==2
					drop _merge
				
				
				** We can compute the travel adjusted prices in three different ways
					
					global numtripeqv 2.6969 // 1trip going + 1trip coming + 0.6969 carrying. 
											// 0.6969 comes from the coefficients of hours in col(2) and (4) of Appendix Table 2
					
					
					* (3) Using Directly Measured Transport Surveys 
					gen DR_adj_price_50kg		=	av_urea_rprice_17 + DIST_cost_USD_direct*2300*${numtripeqv}
			
			
					tempfile vil_av_long
					save `vil_av_long'
			
			
				egen village_id = group(village_name ward district)
				
				
			** Step (3):  For each village, pick the minimum travel-cost adjusted prices 
			
				foreach var in "50kg" {	
				
					// OTHER METHODS OF COMPUTING TRANSPORT COSTS
						// DIRECT TRANSPORT COSTS
						foreach method in DR {
							// pick the minimum price
							bys village_id: egen min_`method'_adj_price_`var'	= min(`method'_adj_price_`var')
							
							// distance, travel costs, and unadjusted price to the best agrovet
							gen km_`method'_adj_`var'		    = google_vil_AVvil_km 		if min_`method'_adj_price_`var' == `method'_adj_price_`var' & !mi(`method'_adj_price_`var')
							gen tripcost_`method'_adj_`var' 	= DIST_cost_USD_direct*2300	if min_`method'_adj_price_`var' == `method'_adj_price_`var' & !mi(`method'_adj_price_`var')
							gen unadjusted_price_`var'_`method' = av_urea_rprice_17 		if min_`method'_adj_price_`var' == `method'_adj_price_`var' & !mi(`method'_adj_price_`var')
						}
						
				}
		
				// collapse to the village level
				collapse (min) min* km_* tripcost_* (mean) unadjusted_price_50kg*, by(village_id village_name ward district)
						 
			// convert to dollars
				foreach var of varlist min*price* unadjusted_price_50kg*{
					replace `var' = `var'/2300
				}
				
			// generate other variables
			foreach var in DR {
				gen onewaycost_`var'_adj_av_urea_usd = tripcost_`var'_adj_50kg/2300
				gen travelcost_`var'_adj_av_urea_usd = ${numtripeqv}*tripcost_`var'_adj_50kg/2300
			}
				
			tempfile Kili_ViltoAV
			save `Kili_ViltoAV'
				
				
				
				
			di "step 2 ------------------------------------------------------------------------------------------"
			/*******************************************************************************
				append Manyara and Kilimanjaro Data sets
			*******************************************************************************/
			// Kilimanjaro
				use `Kili_ViltoAV', clear
				merge 1:1 village_name ward district using "${analysis}/Kilimanjaro google_ViltoCities.dta"
				drop _merge
				
				gen survey_region="Kilimanjaro"
				
				tempfile kili_vilaccess
				save `kili_vilaccess'
				
			// Manyara
				use "${analysis}/Manyara ViltoAV_censusappended.dta", clear
				merge 1:1 village_id using "${analysis}/Manyara ViltoCities_dist.dta"
				drop _merge
			
			append using `kili_vilaccess'
		
			// create market access proxies
				foreach var in city_km city_hrs nearcity_km{
					gen l_google_vil_`var' = log(google_vil_`var')
					egen std_google_vil_`var' = std(google_vil_`var')
				}	
				
				gen DH_access = .
				foreach city in moshi arusha babati dodoma tanga{
					replace DH_access = 0 if !mi(google_vil_`city'_km)
				}
				foreach city in moshi arusha babati dodoma tanga{
					gen tau_`city' = 2.7*(0.9392 + 0.02019*google_vil_`city'_km)/25
					replace DH_access = DH_access + (1 + tau_`city')^(${elasticity})*pop_frac_`city'
				}
				egen DH_access_std = std(DH_access)
				replace DH_access_std = -DH_access_std
			
			
		//	foreach dep in min_DR_adj_price_50kg unadjusted_price_50kg_DR travelcost_DR_adj_av_urea_usd{
			foreach dep in min_DR_adj_price_50kg {
			
				reg `dep' `indep'
				local t = _b[`indep'] / _se[`indep']
				local pvalue = 2 * ttail(e(df_r), abs(`t'))
				local coef = _b[`indep']
				local se = _se[`indep']
				
				post `postname' (`i') (`price') (`coef') (`se') (`pvalue') (`e(N)')
		
			}
			
			di "------------------------ ------------------------END: Agrovet ID `i'  ------------------------------ ----------------- "
			
		}
	
	}
	
	postclose `postname'
	use `avrobust', clear
	bys AV_missing_ID: egen pickprice = min(coefficient)
	
	tempfile avrobust_missing
	save `avrobust_missing'
	
	
	/*****************************************************************************
	******************************************************************************
		Manyara - there is only 1 missing agrovet
	*******************************************************************************
	******************************************************************************/
	
	use "${analysis}/Manyara agrovet_shop_level_censusappended.dta", clear	
	sum av_urea_rprice_17, d
	/*
						  av_urea_rprice_17
	-------------------------------------------------------------
		  Percentiles      Smallest
	 1%        40000          40000
	 5%        48000          45000
	10%        49000          48000       Obs                  48
	25%        50000          49000       Sum of Wgt.          48
	
	50%        55000                      Mean           54727.08
							Largest       Std. Dev.      6114.919
	75%        59000          65000
	90%        65000          65000       Variance       3.74e+07
	95%        65000          65000       Skewness       .7175895
	99%        75000          75000       Kurtosis       4.434348
	*/
	
	
	
	
	tempfile avrobust
	tempname postname
	
	postfile `postname' AV_missing_ID str30 dep_var price coefficient se pvalue numobs using `avrobust'	
	
	
	
	foreach price in 49000 65000{
	
		use "${analysis}/Manyara agrovet_shop_level_censusappended.dta", clear	
		gen AV_missing_ID = 27 if strpos(matching, "incomplete") & mi(av_urea_rprice_17)
		replace av_urea_rprice_17 = `price' if AV_missing_ID==27
		
			keep av_village_id av_ownerid av_shopid av_district av_ward av_village ///
				 av_gps_lat av_gps_long sellfert sellseeds av_*price* av_*qty* av_*rev* dist_border_km av_urea_17_median_imput
			gen to_merge = 1
			
			tempfile avshoplist
			save `avshoplist'
			
			use "${analysis}/Manyara Vil_distance_to_borders.dta", replace
			rename(district ward village_name) (survey_district survey_ward survey_village) 
			tempfile distborder
			save `distborder'
			
			use "${analysis}/Manyara village_GPS.dta", replace
			keep village_id survey_district survey_ward survey_village village_lat village_lon
			merge 1:1 survey_district survey_ward survey_village using `distborder'
			drop _merge
			gen to_merge = 1
			tempfile villist
			save `villist'
			
			joinby to_merge using `avshoplist'
			drop to_merge
			
			gen has_agrovet_village = 0
			replace has_agrovet_village = 1 if village_id == av_village_id
				
			// calculate distances from each village to each agrovet
				
				* (1) geodetic distances
				geodist village_lat village_lon av_gps_lat av_gps_long, gen(arcdist_vil_AVvil_km)
				sort village_id sellfert sellseeds 
				tempfile vilavpair
				save `vilavpair'
				
				* (2) google distances
				import delimited using "${analysis}/V_to_A_Manyara_final_1.csv", clear
				keep district_origin ward_origin village_name_origin district_dest ward_dest village_name_dest distance_km duration_hrs
				drop if mi(distance_km)&mi(duration_hrs)
				rename (district_origin ward_origin village_name_origin district_dest ward_dest village_name_dest) ///
					   (survey_district survey_ward survey_village av_district av_ward av_village)
				tempfile avgoogle
				save `avgoogle'
			
				use `vilavpair', clear
				merge m:1 survey_district survey_ward survey_village av_district av_ward av_village using `avgoogle'
				drop if _merge==2
				drop _merge
				rename (distance_km duration_hrs) (google_vil_AVvil_km google_vil_AVvil_hrs)
				
				foreach var in survey_district survey_village av_district  av_village{
					replace `var' = subinstr(`var', "_", " ",.)
				}
				
				tempfile vilpair_googledist
				save `vilpair_googledist'
			
			
			** Bring in travel costs directly measured from Transport Surveys
				use "${analysis}/PooledRegions ViltoVil_KMCOST.dta", replace
				keep if survey_region_O == "MANYARA" & survey_region_D == "MANYARA"
				rename (survey_district_O survey_ward_O survey_village_O survey_district_D survey_ward_D survey_village_D) ///
					   (survey_district survey_ward survey_village av_district av_ward av_village)
				keep survey_district survey_ward survey_village av_district av_ward av_village DIST_* arcdist_vil_vil_km
	
				tempfile measuredcost
				save `measuredcost'
				
				use `vilpair_googledist'
				merge m:1 survey_district survey_ward survey_village av_district av_ward av_village using `measuredcost'
				drop if _merge==2
				drop _merge	
			
			
			** Fertilizer
				global type urea
				global numtripeqv 2.6969 // 1trip going + 1trip coming + 0.6969 carrying. 
										// 0.6969 comes from the coefficients of hours in col(2) and (4) in Appendix Table 2. 
				
				gen DR_adj_price_50kg		=	av_urea_rprice_17 + DIST_cost_USD_direct*2300*${numtripeqv}
				 
			tempfile vil_av_long
			save `vil_av_long'
			
	
			
		** Step (3):  For each village, pick the minimum travel-cost adjusted prices 
			
			foreach var in "50kg"{
			
				// OTHER METHODS OF COMPUTING TRANSPORT COSTS
					// DIRECT TRANSPORT COSTS
					foreach method in DR{
						// pick the minimum price
						bys village_id: egen min_`method'_adj_price_`var'	= min(`method'_adj_price_`var')
						bys village_id: egen min_`method'_adj_price_`var'_N	= count(`method'_adj_price_`var')
						
						// distance, travel costs, and unadjusted price to the best agrovet
						gen km_`method'_adj_`var'		    = google_vil_AVvil_km 		if min_`method'_adj_price_`var' == `method'_adj_price_`var' & !mi(`method'_adj_price_`var')
						gen tripcost_`method'_adj_`var' 	= DIST_cost_USD_direct*2300	if min_`method'_adj_price_`var' == `method'_adj_price_`var' & !mi(`method'_adj_price_`var')
						gen unadjusted_price_`var'_`method' = av_urea_rprice_17 		if min_`method'_adj_price_`var' == `method'_adj_price_`var' & !mi(`method'_adj_price_`var')
					}	
			}
			
			// collapse to the village level
			collapse  (mean) min* tripcost_* unadjusted_price_*, by(village_id village_lat village_lon)
			
		// convert to dollars
			foreach var of varlist min*price_50kg* unadjusted_price_* {
				replace `var' = `var'/2300
			}
			
		// generate other variables
			foreach var in DR{
				gen onewaycost_`var'_adj_av_urea_usd = tripcost_`var'_adj_50kg/2300
				gen travelcost_`var'_adj_av_urea_usd = ${numtripeqv}*tripcost_`var'_adj_50kg/2300
			}
			
			tempfile Manyara_ViltoAV
			save `Manyara_ViltoAV'	
		
		
			di "step 2 ------------------------------------------------------------------------------------------"
			/*******************************************************************************
				append Manyara and Kilimanjaro Data sets
			*******************************************************************************/
			// Kilimanjaro
				use "${analysis}/Kilimanjaro ViltoAV_censusappended.dta", clear, 
				merge 1:1 village_name ward district using "${analysis}/Kilimanjaro google_ViltoCities.dta"
				drop _merge
				
				gen survey_region="Kilimanjaro"
				
				tempfile kili_vilaccess
				save `kili_vilaccess'
				
			// Manyara
				use `Manyara_ViltoAV', clear
				merge 1:1 village_id using "${analysis}/Manyara ViltoCities_dist.dta"
				drop _merge
			
			append using `kili_vilaccess'
		
			// create market access proxies
				foreach var in city_km city_hrs nearcity_km{
					gen l_google_vil_`var' = log(google_vil_`var')
					egen std_google_vil_`var' = std(google_vil_`var')
				}	
				
				gen DH_access = .
				foreach city in moshi arusha babati dodoma tanga{
					replace DH_access = 0 if !mi(google_vil_`city'_km)
				}
				foreach city in moshi arusha babati dodoma tanga{
					gen tau_`city' = 2.7*(0.9392 + 0.02019*google_vil_`city'_km)/25
					replace DH_access = DH_access + (1 + tau_`city')^(${elasticity})*pop_frac_`city'
				}
				egen DH_access_std = std(DH_access)
				replace DH_access_std = -DH_access_std
	
		
			foreach dep in min_DR_adj_price_50kg unadjusted_price_50kg_DR travelcost_DR_adj_av_urea_usd{
			//foreach dep in min_DR_adj_price_50kg unadjusted_price_50kg_DR travelcost_DR_adj_av_urea_usd{
			
				reg `dep' `indep'
				local t = _b[`indep'] / _se[`indep']
				local pvalue = 2 * ttail(e(df_r), abs(`t'))
				local coef = _b[`indep']
				local se = _se[`indep']
				
				post `postname' (27) ("`dep'") (`price') (`coef') (`se') (`pvalue') (`e(N)')
		
			}
	}
	
	postclose `postname'
	
	use `avrobust', clear
	
	
	/* USING std_google_vil_city_km
	
		 +--------------------------------------------------------------------------------------------+
		 | AV_mis~D                         dep_var   price   coeffi~t         se     pvalue   numobs |
		 |--------------------------------------------------------------------------------------------|
	  1. |       27           min_DR_adj_price_50kg   49000    2.42719   .1565214          0     1069 |
	  2. |       27        unadjusted_price_50kg_DR   49000   1.081121   .0691833          0     1069 |
	  3. |       27   travelcost_DR_adj_av_urea_usd   49000    1.34607    .159903   1.22e-16     1069 |
	  4. |       27           min_DR_adj_price_50kg   65000   2.423879   .1565934          0     1069 |
	  5. |       27        unadjusted_price_50kg_DR   65000   1.096213   .0690159          0     1069 |
		 |--------------------------------------------------------------------------------------------|
	  6. |       27   travelcost_DR_adj_av_urea_usd   65000   1.327666   .1601401   3.36e-16     1069 |
		 +--------------------------------------------------------------------------------------------+
	
		 
		USING DH_access_std
	
		 +--------------------------------------------------------------------------------------------+
		 | AV_mis~D                         dep_var   price   coeffi~t         se     pvalue   numobs |
		 |--------------------------------------------------------------------------------------------|
	  1. |       27           min_DR_adj_price_50kg   49000   2.452934   .1227684          0     1069 |
	  2. |       27        unadjusted_price_50kg_DR   49000   1.281143   .0708699          0     1069 |
	  3. |       27   travelcost_DR_adj_av_urea_usd   49000   1.171791   .1305725   1.26e-18     1069 |
	  4. |       27           min_DR_adj_price_50kg   65000   2.451972   .1227889          0     1069 |
	  5. |       27        unadjusted_price_50kg_DR   65000   1.282568   .0709295          0     1069 |
		 |--------------------------------------------------------------------------------------------|
	  6. |       27   travelcost_DR_adj_av_urea_usd   65000   1.169405   .1305741   1.47e-18     1069 |
		 +--------------------------------------------------------------------------------------------+
	
		 
	*/
	
	
	
	
	/******************************************************************************
	*******************************************************************************
	RUN ALL MISSING AGROVETS ALL TOGETHER
	*******************************************************************************
	******************************************************************************/
	
	local filename missingAVrobust
	
		use `avrobust_missing', clear
		
		keep if pickprice ==coefficient 
		duplicates drop AV_missing_ID , force
		list AV_missing_ID price 
		keep AV_missing_ID price 
		drop if AV_missing_ID==27
		tempfile pickavprice
		save `pickavprice'
		
		use "${analysis}/Kilimanjaro agrovet_shop_level_censusappended.dta", clear	
		gen AV_missing = (mi(urea_cost) & incensus!=.)
		gsort -AV_missing av_shopid
		gen AV_missing_ID = _n if AV_missing==1
		sum av_urea_rprice_17
		
		merge m:1 AV_missing_ID using `pickavprice'
		replace av_urea_rprice_17=price if _merge==3
		drop _merge
		sum av_urea_rprice_17
		
		/*******************************************************************************
			run ViltoAV Kilimanjaro
		*******************************************************************************/
		** Step (1): make a pair for each village-agrovet shop
			
			di "step 1 ------------------------------------------------------------------------------------------"
			keep av_ownerid av_shopid av_district av_ward av_village ///
			sellfert sellurea sellDAP sellseeds av_*price* av_*qty* av_*rev* dist_border_km av_urea_17_median_imput
			gen to_merge = 1
			tempfile avshoplist
			save `avshoplist'
			
			use "${analysis}/Kilimanjaro road_quality_census.dta", replace
			keep village_name ward district
			merge 1:1 village_name ward district using "${analysis}/Kilimanjaro Vil_distance_to_borders.dta"
			drop _merge
			gen to_merge = 1
			tempfile villist
			save `villist'
			
			joinby to_merge using `avshoplist'
			drop to_merge
		
			gen has_agrovet_village = 0
			replace has_agrovet_village = 1 if (village_name == av_village) & (ward == av_ward) & (district == av_district)
			tempfile vilavpair
			save `vilavpair'
			
			// calculate distances from each village to each agrovet
			
				* google distances
				use "${analysis}/Kilimanjaro ViltoVil.dta", clear
				keep district_origin ward_origin village_name_origin district_dest ward_dest village_name_dest distance_km duration_hrs
				rename (district_origin ward_origin village_name_origin district_dest ward_dest village_name_dest) ///
					   (district ward village_name av_district av_ward av_village)
				tempfile avgoogle
				save `avgoogle'
			
				use `vilavpair', clear
				merge m:1 district ward village_name av_district av_ward av_village using `avgoogle'
				/*
				Result                           # of obs.
				-----------------------------------------
				not matched                       206,733
					from master                    23,470  (_merge==1) <-- see what we can do here later.
					from using                    183,263  (_merge==2)
			
				matched                           233,600  (_merge==3)
				-----------------------------------------
				*/
				drop if _merge==2
				drop _merge
				rename (distance_km duration_hrs) (google_vil_AVvil_km google_vil_AVvil_hrs)
				tempfile vilpair_googledist
				save `vilpair_googledist'
			
			
			** Bring in travel costs directly measured from Transport Surveys
				use "${analysis}/PooledRegions ViltoVil_KMCOST.dta", replace
				keep if survey_region_O == "KILIMANJARO" & survey_region_D == "KILIMANJARO"
				rename (survey_district_O survey_ward_O survey_village_O survey_district_D survey_ward_D survey_village_D) ///
					   (district ward village_name av_district av_ward av_village)
				keep district ward village_name av_district av_ward av_village DIST_* arcdist_vil_vil_km
				tempfile measuredcost
				save `measuredcost'
				
				use `vilpair_googledist'
				merge m:1 district ward village_name av_district av_ward av_village using `measuredcost'
				drop if _merge==2
				drop _merge
			
			
			** We can compute the travel adjusted prices in three different ways
				
				global numtripeqv 2.6969 // 1trip going + 1trip coming + 0.6969 carrying. 
										// 0.6969 comes from the coefficients of hours in col(2) and (4) of Appendix Table 2
				
				
				* (3) Using Directly Measured Transport Surveys 
				gen DR_adj_price_50kg		=	av_urea_rprice_17 + DIST_cost_USD_direct*2300*${numtripeqv}
		
		
				tempfile vil_av_long
				save `vil_av_long'
		
		
			egen village_id = group(village_name ward district)
			
			
		** Step (3):  For each village, pick the minimum travel-cost adjusted prices 
		
			foreach var in "50kg" {	
			
				// OTHER METHODS OF COMPUTING TRANSPORT COSTS
					// DIRECT TRANSPORT COSTS
					foreach method in DR {
						// pick the minimum price
						bys village_id: egen min_`method'_adj_price_`var'	= min(`method'_adj_price_`var')
						
						// distance, travel costs, and unadjusted price to the best agrovet
						gen km_`method'_adj_`var'		    = google_vil_AVvil_km 		if min_`method'_adj_price_`var' == `method'_adj_price_`var' & !mi(`method'_adj_price_`var')
						gen tripcost_`method'_adj_`var' 	= DIST_cost_USD_direct*2300	if min_`method'_adj_price_`var' == `method'_adj_price_`var' & !mi(`method'_adj_price_`var')
						gen unadjusted_price_`var'_`method' = av_urea_rprice_17 		if min_`method'_adj_price_`var' == `method'_adj_price_`var' & !mi(`method'_adj_price_`var')
					}
					
			}
	
			// collapse to the village level
			collapse (min) min* km_* tripcost_* (mean) unadjusted_price_50kg*, by(village_id village_name ward district)
					 
		// convert to dollars
			foreach var of varlist min*price* unadjusted_price_50kg*{
				replace `var' = `var'/2300
			}
			
		// generate other variables
		foreach var in DR {
			gen onewaycost_`var'_adj_av_urea_usd = tripcost_`var'_adj_50kg/2300
			gen travelcost_`var'_adj_av_urea_usd = ${numtripeqv}*tripcost_`var'_adj_50kg/2300
		}
			
		tempfile Kili_ViltoAV
		save `Kili_ViltoAV'
			
			
			
			
			
			
			
			
		di "step 2 ------------------------------------------------------------------------------------------"
		/*******************************************************************************
			run vil to AV Manyara
		*******************************************************************************/
		
		use "${analysis}/Manyara agrovet_shop_level_censusappended.dta", clear	
		gen AV_missing_ID = 27 if strpos(matching, "incomplete") & mi(av_urea_rprice_17)
		replace av_urea_rprice_17 = 65000 if AV_missing_ID==27
		
		
			keep av_village_id av_ownerid av_shopid av_district av_ward av_village ///
				 av_gps_lat av_gps_long sellfert sellseeds av_*price* av_*qty* av_*rev* dist_border_km av_urea_17_median_imput
			gen to_merge = 1
			
			tempfile avshoplist
			save `avshoplist'
			
			use "${analysis}/Manyara Vil_distance_to_borders.dta", replace
			rename(district ward village_name) (survey_district survey_ward survey_village) 
			tempfile distborder
			save `distborder'
			
			use "${analysis}/Manyara village_GPS.dta", replace
			keep village_id survey_district survey_ward survey_village village_lat village_lon
			merge 1:1 survey_district survey_ward survey_village using `distborder'
			drop _merge
			gen to_merge = 1
			tempfile villist
			save `villist'
			
			joinby to_merge using `avshoplist'
			drop to_merge
			
			gen has_agrovet_village = 0
			replace has_agrovet_village = 1 if village_id == av_village_id
				
			// calculate distances from each village to each agrovet
				
				* (1) geodetic distances
				geodist village_lat village_lon av_gps_lat av_gps_long, gen(arcdist_vil_AVvil_km)
				sort village_id sellfert sellseeds 
				tempfile vilavpair
				save `vilavpair'
				
				* (2) google distances
				import delimited using "${analysis}/V_to_A_Manyara_final_1.csv", clear
				keep district_origin ward_origin village_name_origin district_dest ward_dest village_name_dest distance_km duration_hrs
				drop if mi(distance_km)&mi(duration_hrs)
				rename (district_origin ward_origin village_name_origin district_dest ward_dest village_name_dest) ///
					   (survey_district survey_ward survey_village av_district av_ward av_village)
				tempfile avgoogle
				save `avgoogle'
			
				use `vilavpair', clear
				merge m:1 survey_district survey_ward survey_village av_district av_ward av_village using `avgoogle'
				drop if _merge==2
				drop _merge
				rename (distance_km duration_hrs) (google_vil_AVvil_km google_vil_AVvil_hrs)
				
				foreach var in survey_district survey_village av_district  av_village{
					replace `var' = subinstr(`var', "_", " ",.)
				}
				
				tempfile vilpair_googledist
				save `vilpair_googledist'
			
			
			** Bring in travel costs directly measured from Transport Surveys
				use "${analysis}/PooledRegions ViltoVil_KMCOST.dta", replace
				keep if survey_region_O == "MANYARA" & survey_region_D == "MANYARA"
				rename (survey_district_O survey_ward_O survey_village_O survey_district_D survey_ward_D survey_village_D) ///
					   (survey_district survey_ward survey_village av_district av_ward av_village)
				keep survey_district survey_ward survey_village av_district av_ward av_village DIST_* arcdist_vil_vil_km
	
				tempfile measuredcost
				save `measuredcost'
				
				use `vilpair_googledist'
				merge m:1 survey_district survey_ward survey_village av_district av_ward av_village using `measuredcost'
				drop if _merge==2
				drop _merge	
			
			
			** Fertilizer
				global type urea
				global numtripeqv 2.6969 // 1trip going + 1trip coming + 0.6969 carrying. 
										// 0.6969 comes from the coefficients of hours in col(2) and (4) in Appendix Table 2. 
				
				gen DR_adj_price_50kg		=	av_urea_rprice_17 + DIST_cost_USD_direct*2300*${numtripeqv}
				 
			tempfile vil_av_long
			save `vil_av_long'
			
	
			
		** Step (3):  For each village, pick the minimum travel-cost adjusted prices 
			
			foreach var in "50kg"{
			
				// OTHER METHODS OF COMPUTING TRANSPORT COSTS
					// DIRECT TRANSPORT COSTS
					foreach method in DR{
						// pick the minimum price
						bys village_id: egen min_`method'_adj_price_`var'	= min(`method'_adj_price_`var')
						bys village_id: egen min_`method'_adj_price_`var'_N	= count(`method'_adj_price_`var')
						
						// distance, travel costs, and unadjusted price to the best agrovet
						gen km_`method'_adj_`var'		    = google_vil_AVvil_km 		if min_`method'_adj_price_`var' == `method'_adj_price_`var' & !mi(`method'_adj_price_`var')
						gen tripcost_`method'_adj_`var' 	= DIST_cost_USD_direct*2300	if min_`method'_adj_price_`var' == `method'_adj_price_`var' & !mi(`method'_adj_price_`var')
						gen unadjusted_price_`var'_`method' = av_urea_rprice_17 		if min_`method'_adj_price_`var' == `method'_adj_price_`var' & !mi(`method'_adj_price_`var')
					}	
			}
			
			// collapse to the village level
			collapse  (mean) min* tripcost_* unadjusted_price_*, by(village_id village_lat village_lon)
			
		// convert to dollars
			foreach var of varlist min*price_50kg* unadjusted_price_* {
				replace `var' = `var'/2300
			}
			
		// generate other variables
			foreach var in DR{
				gen onewaycost_`var'_adj_av_urea_usd = tripcost_`var'_adj_50kg/2300
				gen travelcost_`var'_adj_av_urea_usd = ${numtripeqv}*tripcost_`var'_adj_50kg/2300
			}
			
			tempfile Manyara_ViltoAV
			save `Manyara_ViltoAV'	
			
		
		
		
		
		
		/*******************************************************************************
			append Manyara and Kilimanjaro Data sets
		*******************************************************************************/
		// Kilimanjaro
			use `Kili_ViltoAV', clear
			merge 1:1 village_name ward district using "${analysis}/Kilimanjaro google_ViltoCities.dta"
			drop _merge
			
			gen survey_region="Kilimanjaro"
			
			tempfile kili_vilaccess
			save `kili_vilaccess'
			
		// Manyara
			use `Manyara_ViltoAV'
			merge 1:1 village_id using "${analysis}/Manyara ViltoCities_dist.dta"
			drop _merge
		
		append using `kili_vilaccess'
	
	
		// create market access proxies
				foreach var in city_km city_hrs nearcity_km{
					gen l_google_vil_`var' = log(google_vil_`var')
					egen std_google_vil_`var' = std(google_vil_`var')
				}	
				
				gen DH_access = .
				foreach city in moshi arusha babati dodoma tanga{
					replace DH_access = 0 if !mi(google_vil_`city'_km)
				}
				foreach city in moshi arusha babati dodoma tanga{
					gen tau_`city' = 2.7*(0.9392 + 0.02019*google_vil_`city'_km)/25
					replace DH_access = DH_access + (1 + tau_`city')^(${elasticity})*pop_frac_`city'
				}
				egen DH_access_std = std(DH_access)
				replace DH_access_std = -DH_access_std
	
		
		// run regressions
			reg min_DR_adj_price_50kg `indep'
			sum min_DR_adj_price_50kg if e(sample)
			local mean=r(mean)
			local sd=r(sd)
			
			local replaceappend1 replace
			local replaceappend2 append
			
			quietly outreg2 using "${pool_results}/PooledRegions Villagelevelaccess_`filename'.xls", nonote se symb(***,**,*) `replaceappend`k'' dec(2) ///
			addstat("mean of depvar", `mean', "sd of depvar", `sd') keep(`indep')
			
			foreach dep in unadjusted_price_50kg_DR travelcost_DR_adj_av_urea_usd{
			
				reg `dep' `indep'
				sum `dep' if e(sample)
				local mean=r(mean)
				local sd=r(sd)
				
				quietly outreg2 using "${pool_results}/PooledRegions Villagelevelaccess_`filename'.xls", nonote se symb(***,**,*) append dec(2) ///
				addstat("mean of depvar", `mean', "sd of depvar", `sd') keep(`indep')
			}
}
